{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading ../data/san+francisco,ca.csv...\n",
      "Loading ../data/new+york,ny.csv...\n",
      "Loading ../data/springfield,ma.csv...\n",
      "Loading ../data/boston,ma.csv...\n",
      "Loading ../data/springfield,il.csv...\n",
      "Loading ../data/albany,ny.csv...\n",
      "Loading ../data/los+angeles,ca.csv...\n",
      "Loading ../data/chicago,il.csv...\n"
     ]
    }
   ],
   "source": [
    "# Load min-temp and max-temp data from multiple cities\n",
    "# Turn all of that data into a single data frame with state, city, date, min, and max temp\n",
    "\n",
    "import glob\n",
    "\n",
    "all_dfs = [] \n",
    "\n",
    "for one_filename in glob.glob('../data/*,*.csv'): \n",
    "    print(f'Loading {one_filename}...')\n",
    "\n",
    "    city, state = (\n",
    "        one_filename\n",
    "        .removeprefix('../data/')\n",
    "        .removesuffix('.csv')\n",
    "        .split(',')\n",
    "    )\n",
    "\n",
    "\n",
    "    one_df = (\n",
    "        pd\n",
    "        .read_csv(one_filename,\n",
    "                  usecols=[0, 1, 2], \n",
    "                  names=['date_time',\n",
    "                         'max_temp',\n",
    "                         'min_temp'], \n",
    "                  header=0) \n",
    "        .assign(city=city.replace('+', ' ').title(), \n",
    "                state=state.upper()) \n",
    "    )\n",
    "\n",
    "    all_dfs.append(one_df) \n",
    "\n",
    "df = pd.concat(all_dfs) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state  city         \n",
       "CA     Los Angeles      2018-12-11 00:00:00\n",
       "       San Francisco    2018-12-11 00:00:00\n",
       "IL     Chicago          2018-12-11 00:00:00\n",
       "       Springfield      2018-12-11 00:00:00\n",
       "MA     Boston           2018-12-11 00:00:00\n",
       "       Springfield      2018-12-11 00:00:00\n",
       "NY     Albany           2018-12-11 00:00:00\n",
       "       New York         2018-12-11 00:00:00\n",
       "Name: date_time, dtype: object"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Does this data set reflect the same time period for each city and state?\n",
    "df.groupby(['state', 'city'])['date_time'].min().sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state  city         \n",
       "CA     Los Angeles      2019-03-11 21:00:00\n",
       "       San Francisco    2019-03-11 21:00:00\n",
       "IL     Chicago          2019-03-11 21:00:00\n",
       "       Springfield      2019-03-11 21:00:00\n",
       "MA     Boston           2019-03-11 21:00:00\n",
       "       Springfield      2019-03-11 21:00:00\n",
       "NY     Albany           2019-03-11 21:00:00\n",
       "       New York         2019-03-11 21:00:00\n",
       "Name: date_time, dtype: object"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['state', 'city'])['date_time'].max().sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state  city         \n",
       "CA     Los Angeles       4\n",
       "       San Francisco     3\n",
       "IL     Chicago         -28\n",
       "       Springfield     -25\n",
       "MA     Boston          -14\n",
       "       Springfield     -20\n",
       "NY     Albany          -19\n",
       "       New York        -14\n",
       "Name: min_temp, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# What is the lowest min temperature in each city during our time period?\n",
    "df.groupby(['state', 'city'])['min_temp'].min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state\n",
       "CA    23\n",
       "IL    16\n",
       "MA    17\n",
       "NY    15\n",
       "Name: max_temp, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# What is the highest max temperature in each *state* during our time period?\n",
    "df.groupby('state')['max_temp'].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
